pckage <- c("polycor", "ggplot2", "gridExtra", "texreg", "car",
            "AER", "survey", "sandwich", "MASS", "lmtest", 
            "weights", "Hmisc", "xtable", "WeightIt")
lapply(pckage, require, character.only=T)

#Set WD
setwd("C:/Users/Matias/Dropbox/Proyectos/Corruption and Political Knoweldge/Replication File")

#Open data with both waves merged in wide format
load("merged_ola_1y2.Rdata")
ls()
dim(ola)

###################################################
############ SCALE AND VARIABLE CREATION ##########
###################################################

#PK
pk_o1 = data.frame(with(ola, cbind(pk1_o1, pk2_o1, pk3_o1, pk4_o1, pk5_o1)))
pk_o2 = data.frame(with(ola, cbind(pk1_o2, pk2_o2, pk3_o2, pk4_o2, pk5_o2)))
ola$f1 <- apply(pk_o1, 1, sum)
ola$f2 <- apply(pk_o2, 1, sum)
table(ola$f2, exclude=NULL)

ola$pol_trst_o1 <- apply(with(ola, cbind(trParlamen_o1, trGob_o1, trPartidos_o1,
                                         trJueces_o1,trFiscal_o1)), 1, mean, na.rm=T)
ola$pol_trst_o2 <- apply(with(ola, cbind(trParlamen_o2, trGob_o2, trPartidos_o2,
                                         trJueces_o2, trFiscal_o2)), 1, mean, na.rm=T)


#Subset of Goods - DVD, TV cable, refrigerador, lavadora automática, secadora de ropa,
#calefón, computador 
ola$bienes_o1 <- with(ola, apply(
  cbind(P83_1, P83_2, P83_3, P83_4, P83_5, P83_7, P83_10), 1, sum, na.rm=T))
barplot(table(ola$bienes_o1))

#Religion recoded
levels(ola$religid_o1) <- list(Catholic="Católica", 
                               Evangélical="Evangélica, pentecostal",
                               Evangélical="Protestante, protestante tradicional (luterana, anglicana, etc.)",
                               Other="Otra religión o credo",
                               Other="Judía", Other="Ortodoxa", Other="Musulmana",
                               Other="Mormona, Iglesia de los Santos de los Últimos Días",
                               Other="Testigo de Jehová",
                               Non_religious="Ninguna, ateo(a), agnóstico(a)")
ola$religid_o1[is.na(ola$religid_o1)] <- "Non_religious"
table(ola$religid_o1, exclude = NULL)

#Percepcion Evolucion Delincuencia
ola$delin_evol <- as.numeric(ola$P15_5)
ola$delin_evol[is.na(ola$delin_evol)] <- mean(ola$delin_evol, na.rm=T)

#Dias que lee noticias
ola$diasDia_o1m <- ola$diasDia_o1
ola$diasDia_o1m[is.na(ola$diasDia_o1)] <- mean(ola$diasDia_o1, na.rm=T)
  
#Replace missing case of educa with mode category (3)
ola$educa_o1Fm <- ola$educa_o1F
ola$educa_o1Fm[is.na(ola$educa_o1F)] <- "Secondary"

#Non response in voted is not voted
ola$voto_2013_o1m <- ola$voto_2013_o1
ola$voto_2013_o1m[is.na(ola$voto_2013_o1)] <- 0

#Responds Wave 2
ola$attrition <- as.numeric(!is.na(ola$SEXO_PS.y))
summary(ola$POND2[ola$attrition==1])

###################################################
#####  Model for probability of Attrition #########
###################################################

model_att <- glm(attrition  ~ SEXO_PS.x + EDAD_PS.x + I(EDAD_PS.x^2) +
                  educa_o1Fm + sit_lab_o1 + religid_o1 + voto_2013_o1m + diasDia_o1m +
                   delin_evol + bienes_o1, data=ola, family=binomial)
summary(fitted(model_att))
summary(1/fitted(model_att))

screenreg(model_att, override.se = sqrt(diag(vcovHC(model_att))), 
          override.pvalues = (1-pnorm(abs(model_att$coef / sqrt(diag(vcovHC(model_att))))))*2,
          custom.coef.names = c("Intercept", "Sex (Female=1)", "Age", "Age^2", "Ed. Secondary",
                                "Ed. Tertiary Techical", "Ed. Tertiary Universitary", 
                                "Currently works", "Retired / Pensioner", "Evangelical",
                                "Other Religion", "Irreligious", "Voted in 2013", 
                                "Days read newspaper or online", "Percepcion Evolution of Crime",
                                "Houshold Goods Index"), single.row = T)
anova(model_att, test="LRT")

#Combine postratification (sex and age) and longitudinal weights
ola$POND4 <- ola$POND3 * (1/fitted(model_att))
#Rescale so it has average = 1
ola$POND4 <- ola$POND4/mean(ola$POND4)
summary(ola$POND4)

#Trimm Weight
ola$Pond4trim <- trim(ola$POND4, at = 1)
summary(ola$Pond4trim)
plot(ola$POND4, ola$Pond4trim)

#------Level of corruption between waves-------###
#Weighted T test for Corruption
t.test(ola$perpcorrup_o2, ola$perpcorrup_o1)
wtd.t.test(ola$perpcorrup_o2, ola$perpcorrup_o1, weight=ola$POND4)
t.test(ola$perpcorrupE_o2, ola$perpcorrupE_o1)
wtd.t.test(ola$perpcorrupE_o2, ola$perpcorrupE_o1, weight=ola$POND4)


##############################################################
#----TABLE B1 OF ONLINE SUPPLEMENT: SUMMARY STATISTICS-----###
##############################################################

lr <- data.frame(model.matrix(~ -1 + lrscaleR_o1, data=ola))
names(lr) <- c("Left-wing Ideology W1", "Centrist Ideology W1", "Right-Wing Ideology W1",
               "Non Ideologues")

ola$educa_o1F[is.na(ola$educa_o1F)] <- ola$educa_o2F[is.na(ola$educa_o1F)]
ed <- data.frame(model.matrix(~ -1 + educa_o1F, data=ola, na.action=na.pass))
names(ed) <- c("Primary Ed. W1", "Secondary Ed. W1", "Tertiary Technical Ed. W1", 
               "Tertiary Universitary Ed. W1")

#Create Common Table
sum_vars <- with(ola, data.frame(corrup_comp_o1, corrup_comp_o2, f1, f2, 
                                 lr, pol_trst_o1, pol_trst_o2,
                                 sex=(as.numeric(SEXO_PS.x)-1), EDAD_PS.x, ed, POND4))
sum_vars <- cbind(sum_vars, pk_o1, pk_o2)                            

#Summary Table with weights
sum.func <- function(x, w) {
  cbind(table(is.na(x))[1],
        mean(x, na.rm=T), sd(x, na.rm=T),
        weighted.mean(x,w, na.rm=T), sqrt(wtd.var(x,w, na.rm=T)),
        min(x, na.rm=T), quantile(x, probs = 0.25, na.rm=T), 
        quantile(x, probs = 0.75, na.rm=T), max(x, na.rm=T))
}

sum.func(lr[,3], ola$POND4)

sum_data <- data.frame(t(
  sapply(1:dim(sum_vars)[2], 
         function(x) sum.func(sum_vars[ola$attrition==1, x], 
                              ola$POND4[ola$attrition==1]))))

rownames(sum_data) <- c("Percived Corruption W1", "Percived Corruption W2", 
                   "Political Knowledge Index W1", "Political Knowledge Intex W2", 
                   names(lr), "Political Trust W1", "Political Trust W2",
                   "Sex (female=1) W1", "Age W1", 
                   names(ed), "Weight",
                   paste0("Political Knowledge Item ", 1:5, " W1"),
                   paste0("Political Knowledge Item ", 1:5, " W2"))

#Add column with census data from CEPAL/CELADE Redatam+SP 02-02-2022
sum_data$census <- 0

colnames(sum_data) <- c("N", "Mean", "St. Dev.", "Weighted Mean", "Weighted St. Dev.",
                        "Min", "Pctl(25)", "Pctl(75)", "Max", "Census 2017")

#Introduce Censsus data
sum_data$`Census 2017`[which(row.names(sum_data)=="Sex (female=1) W1")] <- 0.52
sum_data$`Census 2017`[which(row.names(sum_data)=="Age W1")] <- 43.83
sum_data$`Census 2017`[which(row.names(sum_data)=="Primary Ed. W1")] <- .18
sum_data$`Census 2017`[which(row.names(sum_data)=="Secondary Ed. W1")] <- .45
sum_data$`Census 2017`[which(row.names(sum_data)=="Tertiary Technical Ed. W1")] <- .1
sum_data$`Census 2017`[which(row.names(sum_data)=="Tertiary Universitary Ed. W1")] <- .27

print(xtable(sum_data, digits=c(0, 0, rep(2, length(colnames(sum_data))-1)),
             align=c("l","c","c","c", "p{1.5cm}","p{1.5cm}","c","c","c","c", "c")),
      scalebox=.75, table.placement="H")


##########################################################################
##-------FIGURE C1: POLITICAL KNOWLEDGE BY LAGGED PERCIEVED CORRUPTION--##
##########################################################################

fig2a <- ggplot(data=ola, aes(corrup_comp_o1, f2)) + 
  geom_jitter() + geom_smooth(se=F, method=lm) + theme_bw() + 
  labs(x=expression(Percieved~Corruption[t-1]), 
       y=expression(Political~Knowledge[t]),
       title="A) Political Knowledge by Lagged Percieved Corruption") + 
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    text = element_text(size = 14),
    axis.text = element_text(size = 13),
    axis.title = element_text(size = 15)
  )


#Change lrscaleR_o1 labels
ola$ideo_group_o1 <- ola$lrscaleR_o1
levels(ola$ideo_group_o1) <- c("Left-wing", "Center", "Right-wing", "Non-ideologues")

fig2b <- ggplot(data=ola, aes(corrup_comp_o1, f2)) + 
  geom_jitter() + geom_smooth(se=F, method=lm) + 
  facet_wrap(~ideo_group_o1) + theme_bw() + 
  labs(x=expression(Percieved~Corruption[t-1]), 
       y=expression(Political~Knowledge[t]),
       title="B) Political Knowledge by Lagged Percieved Corruption \nand Ideological Preferences") + 
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    text = element_text(size = 14),
    axis.text = element_text(size = 13),
    axis.title = element_text(size = 15),
    strip.text = element_text(size = 14)
  )


#Save
fig2 <- grid.arrange(fig2a, fig2b, ncol=2)
#Save
ggsave("FigureC1.png", plot=fig2, width=16, height=8, dpi=300)



################################################################################
####-- TABLE D2: HALF-LONGITUDINAL MEDIATION MODELS FOR POLITICAL KNOWLEDGE--###
################################################################################

#Dummies variables for Education
mis_ed <- which(is.na(ola$educa_o1F))
ola$educa_o1F[is.na(ola$educa_o1F)] <- ola$educa_o2F[is.na(ola$educa_o1F)]
ed <- data.frame(model.matrix(~ -1 + educa_o1F, data=ola, na.action=na.pass))
names(ed) <- c("Primary_Ed", "Secondary_Ed", "Technical_Ed", "Universitary_Ed")
summary(ed)
ola <- cbind(ola, ed)
table(ola$educa_o1F, ola$Primary_Ed)
table(ola$educa_o1F, ola$Secondary_Ed)
ola$Primary_Ed[mis_ed] <- NA
ola$Secondary_Ed[mis_ed] <- NA
ola$educa_o1F[mis_ed] <- NA

#Variables Estandarizadas
stdz <- function(x) (x - mean(x, na.rm=T)) / sd(x, na.rm=T)

ola_sd <- data.frame(corrup_comp_o1=stdz(ola$corrup_comp_o1), 
                     corrup_comp_o2=stdz(ola$corrup_comp_o2), 
                     pol_trst_o1 = stdz(ola$pol_trst_o1),
                     pol_trst_o2 = stdz(ola$pol_trst_o2),
                     f1 = stdz(ola$f1), f2 = stdz(ola$f2),
                     Center = stdz(ola$Center), Right = stdz(ola$Right), 
                     None = stdz(ola$None), SEXO_PS.x = stdz(as.numeric(ola$SEXO_PS.x)),
                     EDAD_PS.x = stdz(ola$EDAD_PS.x), 
                     Secondary_Ed = stdz(ola$Secondary_Ed),
                     Technical_Ed = stdz(ola$Technical_Ed),
                     Universitary_Ed = stdz(ola$Universitary_Ed))

#------Mediation Model for Political Knowledge ------#

mediation_pk_sem = '

#Mediator
Pol_Trst =~ pol_trst_o1

#Mediaiton model
Pol_Trst ~ a * corrup_comp_o1 + f1 + Center + Right + None + SEXO_PS.x + EDAD_PS.x + Secondary_Ed + Technical_Ed + Universitary_Ed

#Outcome Model
f2 ~ c * corrup_comp_o1 + f1 + b*Pol_Trst + Center + Right + None + SEXO_PS.x + EDAD_PS.x + Secondary_Ed + Technical_Ed + Universitary_Ed

#Direct effect
direct := c

#Indirect effect
indirect := a*b

#Total effect
total := c + (a*b)
'

#SEM  
pk_sem_t1 = sem(mediation_pk_sem, se="boot", data=subset(ola, !is.na(corrup_comp_o2)))
pk_sem_t1_sd = sem(mediation_pk_sem, se="boot", data=subset(ola_sd, !is.na(corrup_comp_o2)))

pk_sem_t2 = sem(mediation_pk_sem, se="boot", data=subset(ola, !is.na(corrup_comp_o2)))
pk_sem_t2_sd = sem(mediation_pk_sem, se="boot", data=subset(ola_sd, !is.na(corrup_comp_o2)))

summary(pk_sem_t1, fit.measures=TRUE, standardized=TRUE)

semTable(list("Political Trust t1"=pk_sem_t1, "Standardized t1"=pk_sem_t1_sd,
              "Political Trust t2"=pk_sem_t2, "Standardized t2"=pk_sem_t2_sd),
         file="Table4.tex",
         columns = list("Political Trust t1"=c("estsestars"), "Standardized t1"=c("est"),
                        "Political Trust t2"=c("estsestars"), "Standardized t2"=c("est")),
         paramSets=c("slopes", "constructed", "fits"),
         fits = c("cfi", "rmsea", "aic", "bic", "ntotal"),
         columnLabels = list(estse="Estimate (Std.Err.)"),
         print.results = F, type="tex", 
         varLabels=c("Pol_Trst"="Political Trust", "corrup_comp_o1"="Percieved Corruption_{t-1}",
                     "f1" = "Political Knowledge_{t-1}", "f2" = "Political Knowledge_{t}",
                     "Center" = "Center_{t-1}", "Right" = "Right_{t-1}", 
                     "None"="No Ideology_{t-1}", "SEXO_PS.x"="Female_{t-1}",
                     "EDAD_PS.x"="Age", "Secondary_Ed"="Secondary Ed_{t-1}",
                     "Technical_Ed"="Tertiary Technical Ed_{t-1}",
                     "Universitary_Ed"="Tertiary Universitary Ed_{t-1}",
                     "direct"="Direct Effect", "indirect"="Indirect Effect",
                     "total"="Total Effect"),
         alpha = c(0.1, 0.05, 0.01))


############################################################################
####-- TABLE E3: CROSS-LAGGED MODELS WITH FURTHER CONTROL VARIABLES---######
############################################################################

#Not voted because being not old enough is not voted
ola$voto_2013_o1m <- ola$voto_2013_o1
ola$voto_2013_o1m[ola$P52_3=="No tenía edad para votar"] <- 0

m1e <- lm(f2 ~ SEXO_PS.x + EDAD_PS.x + educa_o1F + lrscaleR_o1 +  corrup_comp_o1 + f1 + 
            polint_o1 + AtenPol_o1 + diasDia_o1 + voto_2013_o1 + marcha_o1 +
            tamred_o1 + aprobacion_o1, data=ola, subset=!is.na(corrup_comp_o2))
m1f <- lm(corrup_comp_o2 ~ SEXO_PS.x + EDAD_PS.x + educa_o1F + lrscaleR_o1 + corrup_comp_o1 + f1 + 
            polint_o1 + AtenPol_o1 + diasDia_o1 + voto_2013_o1 + marcha_o1 +
            tamred_o1 + aprobacion_o1, data=ola)
m1g <- lm(f2 ~ SEXO_PS.x + EDAD_PS.x + educa_o1F + lrscaleR_o1 +  corrup_comp_o1 + f1 + 
            pol_trst_o1 + polint_o1 + AtenPol_o1 + diasDia_o1 + voto_2013_o1 + marcha_o1 +
            tamred_o1 + aprobacion_o1, data=ola, subset=!is.na(corrup_comp_o2))
m1h <- lm(corrup_comp_o2 ~ SEXO_PS.x + EDAD_PS.x + educa_o1F + lrscaleR_o1 + corrup_comp_o1 + f1 + 
            pol_trst_o1 + polint_o1 + AtenPol_o1 + diasDia_o1 + voto_2013_o1 + marcha_o1 +
            tamred_o1 + aprobacion_o1, data=ola)

se_m1e <- sqrt(diag(vcovHC(m1e, type="HC0")))
pval_m3e <- (1-pnorm(abs(m1e$coef / se_m1e)))*2
se_m1f <- sqrt(diag(vcovHC(m1f, type="HC0")))
pval_m3f <- (1-pnorm(abs(m1f$coef / se_m1f)))*2
se_m1g <- sqrt(diag(vcovHC(m1g, type="HC0")))
pval_m3g <- (1-pnorm(abs(m1g$coef / se_m1g)))*2
se_m1h <- sqrt(diag(vcovHC(m1h, type="HC0")))
pval_m3h <- (1-pnorm(abs(m1h$coef / se_m1h)))*2

texreg(l=list(m1e, m1f, m1g, m1h), override.se = list(se_m1e, se_m1f, se_m1g, se_m1h),
       override.pvalues = list(pval_m3e, pval_m3f, pval_m3g, pval_m3h),
       custom.coef.map = list("(Intercept)"="Intercept", 
                              "corrup_comp_o1"="Percieved Corruption_{t-1}",
                              "f1"="Political Knowledge_{t-1}",
                              "SEXO_PS.xMujer"="Female_{t-1}",
                              "EDAD_PS.x"="Age_{t-1}",
                              "educa_o1FSecondary"="Secondary Ed._{t-1}", 
                              "educa_o1FTertiary Technical"="Tertiary Technical Ed._{t-1}", 
                              "educa_o1FTertiary Universitary"="Tertiary Universitary Ed._{t-1}",
                              "lrscaleR_o1Center"="Center_{t-1}", 
                              "lrscaleR_o1Right"="Right-wing_{t-1}",
                              "lrscaleR_o1None"="No Ideology_{t-1}", 
                              "polint_o1" = "Interest in Politics_{t-1}",
                              "AtenPol_o1"="Attention to Political News_{t-1}",
                              "diasDia_o1"="Days read news on paper or online_{t-1}",
                              "voto_2013_o1"="Electoral participation in 2013",
                              "marcha_o1"="Frequency of Participation in Protests_{t-1}",
                              "tamred_o1"="Political discussion network size_{t-1}",
                              "aprobacion_o1"="Government Approval_{t-1}",
                              "pol_trst_o1"="Political Trust_{t-1}"),
       custom.model.names = rep(c("Pol. Know_{t}", "Per. Corrup_{t}"),2),
       caption="Cross Lagged Panel Models with Further Control Variables",
       booktabs = T, dcolumn = T, use.packages = F, float.pos="H", scalebox=.8,
       caption.above=T, stars=c(0.01, 0.05, 0.1))
